# 拿到页面源代码————————————————requests
# 通过re来提取我们需要的数据——————re
# csv一种数据格式，数据通过逗号分隔。可以通过这个格式写入到文件中
import re
import requests
import csv

# 每个页面，每次增加25
page = 0


for i in range(2):
    # 获取页面源代码
    url = 'https://movie.douban.com/top250?start={}&filter='.format(page)

    dic = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36"
    }
    resp = requests.get(url, headers=dic)
    page_content = resp.text

    # 解析数据
    obj = re.compile(
        r'<li>.*?<span class="title">(?P<name>.*?)</span>.*?...<br>(?P<year>.*?)&nbsp;.*?<span class="rating_num" property="v:average">'
        r'(?P<score>.*?)</span>.*?<span>(?P<comments>.*?)</span>', re.S)
    result = obj.finditer(page_content)
    f = open('douban.csv', 'a')
    csvwriter = csv.writer(f)
    for i in result:
        # print(i.group('name'))
        # print(i.group('year').strip())
        # print(i.group('score'))
        # print(i.group('comments'))
        # print()
        print(i)
        dic = i.groupdict()
        dic['year'] = dic['year'].strip()
        csvwriter.writerow(dic.values())
    page += 25
f.close()
resp.close()

